import os
import sys

from zkl_datasets import load_dataset

root_dir_path = os.path.join(os.path.dirname(__file__), "../../..")
sys.path.append(root_dir_path)

from scripts.config import datasets_dir_path

dataset_path = os.path.join(datasets_dir_path, "DuReader")

dataset = load_dataset(dataset_path)
for name, subset in dataset.subsets.items():
    print(f"{name}: {len(subset)}")
    for sample in subset:
        print(sample['title'])
        print(sample['paragraphs'][0][0])
        break
